library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.2.1 ✔ dplyr 1.1.2
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggplot2)
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(leaflet)
A time series line graph, with date along the bottom (month or quarter, tbc), and KPI measure on y-axis. From 2018(or earlier) until now. requires data cleaning step to convert month/quarter to datetime class
Including a line for “all health boards” - tbc whether an average or aggregate for the KPI. requires data wrangling step to make this summary data We could make a static plot of all hbs to display alongside an interactive plot to select HBs; such that all HBs is not needed in the input selector.
With a filter for health board (so we can filter for 1 or multi-select for 1+).
The following three datasets collect data about number of hospital admissions from 2020 to 2023. The number of hospital admissions is here registered weekly. For each of them, we have informations about hb, weekly hospital admissions and average and percentage of hospital admissions during the precovid (period 2018/2019). In addition to that, each of the three files contains informations respectively about demographics of patients, simd quintile and specialty.
ha_demo <- read_csv("../data/covid/hospital_admissions/hospital_admissions_hb_agesex_20230706.csv") %>%
clean_names()
## Rows: 74164 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): HB, HBQF, AgeGroup, AgeGroupQF, Sex, SexQF, AdmissionType, Admissio...
## dbl (4): WeekEnding, NumberAdmissions, Average20182019, PercentVariation
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ha_simd <- read_csv("../data/covid/hospital_admissions/hospital_admissions_hb_simd_20230706.csv") %>%
clean_names()
## Rows: 36408 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): HB, HBQF, AdmissionType, AdmissionTypeQF
## dbl (5): WeekEnding, SIMDQuintile, NumberAdmissions, Average20182019, Percen...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ha_specialty <- read_csv("../data/covid/hospital_admissions/hospital_admissions_hb_specialty_20230706.csv") %>%
clean_names()
## Rows: 66039 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): HB, HBQF, AdmissionType, AdmissionTypeQF, Specialty, SpecialtyQF
## dbl (4): WeekEnding, NumberAdmissions, Average20182019, PercentVariation
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
First of all, I create a column called ‘month_ending_date’, which I want have a data format and contains simply the month, in this way I will be able to group the data by month and make the dataset sìconsistent with the other datasets.
ha_demo <- ha_demo %>%
mutate(month_ending_date = ym(str_sub(as.character(week_ending), start = 1, end = 6)), .after = week_ending) %>%
mutate(week_ending = ymd(week_ending))
ha_simd <- ha_simd %>%
mutate(month_ending_date = ym(str_sub(as.character(week_ending), start = 1, end = 6)), .after = week_ending) %>%
mutate(week_ending = ymd(week_ending))
#Since, the heal board code 'Scotland' correspond to the entire Scotland, I am going to mutate it in 'Scotland' for clarity.
#Finally, I am going to create a new column for dividing the age_groups into different age groups I am more interested in.
ha_demo <- ha_demo %>%
mutate(age = case_when(
age_group == "Under 5"~ "Under 5",
age_group %in% c("15 - 44", "45 - 64")~"5 - 64",
age_group %in% c("65 - 74", "75 - 84", "85 and over")~"over 65",
age_group == "5 - 14"~ "5 - 64",
age_group == "All ages"~"All ages"
),
.after = age_group)
ha_demo %>%
filter(hb %in% c("S08000015", "S92000003")) %>%
group_by(hb, month_ending_date) %>%
summarise(mean_admissions = mean(number_admissions)) %>%
ggplot() +
aes(x = month_ending_date, y = mean_admissions, group = hb, colour = hb) +
geom_line()
## `summarise()` has grouped output by 'hb'. You can override using the `.groups`
## argument.
map <- read_csv("../data/cleaned_data/hospital_locations_clean.csv") %>%
clean_names()
## Rows: 277 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (14): Location, LocationName, Postcode, AddressLine, HB, HSCP, HSCPQF, C...
## dbl (4): XCoordinate, YCoordinate, longitude, latitude
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ha_demo %>%
group_by(hb) %>%
summarise(mean_adm = mean(number_admissions)) %>%
ggplot() +
aes(x = hb, y = mean_adm, fill = hb) +
geom_col()
ha_demo %>%
filter(month_ending_date == "2023-06-01") %>%
group_by(hb) %>%
summarise(mean_adm = mean(number_admissions)) %>%
ggplot() +
aes(x = hb, y = mean_adm, fill = hb) +
geom_col()
ha_for_join <- ha_demo %>%
group_by(hb) %>%
summarise(mean_adm = mean(number_admissions))
#join <- left_join(ha_demo, map, by = "hb")
join_ha_map <- full_join(map, ha_for_join, by = "hb")
#Cleaning join dataset
join_ha_map %>%
leaflet() %>%
addProviderTiles(providers$Stamen.TonerLite) %>%
addCircleMarkers(lng = ~ longitude,
lat = ~ latitude,
weight = 0,
fillColor = ~colorNumeric('RdYlGn', mean_adm)
(mean_adm),
fillOpacity = 0.9
#popup = ~ paste( br(), "Board:", HB, br(), round(mean_diff, 0))
)
## Warning in validateCoords(lng, lat, funcName): Data contains 2 rows with either
## missing or invalid lat/lon values and will be ignored
ha_demo %>%
arrange(desc(week_ending))
ha_simd %>%
filter(hb %in% c("S08000015")) %>%
group_by(simd_quintile, month_ending_date) %>%
summarise(mean_admissions = mean(number_admissions)) %>%
ggplot() +
aes(x = month_ending_date, y = mean_admissions, group = simd_quintile, colour = simd_quintile) +
geom_line()
## `summarise()` has grouped output by 'simd_quintile'. You can override using the
## `.groups` argument.
ha_demo %>%
filter(hb %in% c("S92000003"), age_group != "All ages") %>%
group_by(age_group, month_ending_date) %>%
summarise(mean_admissions = mean(number_admissions)) %>%
ggplot() +
aes(x = month_ending_date, y = mean_admissions, group = age_group, colour = age_group) +
geom_line() +
labs(x = "\nYear",
y = "average monthly hospital admissions\n")
## `summarise()` has grouped output by 'age_group'. You can override using the
## `.groups` argument.
ha_demo %>%
filter(hb %in% c("S08000015"), age != "All ages") %>%
group_by(age, month_ending_date) %>%
summarise(mean_admissions = mean(number_admissions)) %>%
ggplot() +
aes(x = month_ending_date, y = mean_admissions, group = age, colour = age) +
geom_line()
## `summarise()` has grouped output by 'age'. You can override using the `.groups`
## argument.
Starting point: Chiara work on covid/hospital_admissions data
Starting point: analyses/naomi_exploring_inpatient.Rmd
# load in data (deprivation - also avail: age/sex, specialty)
inpatients_deprivation <- read_csv("../data/inpatient/inpatient_and_daycase_by_nhs_board_of_treatment_and_simd.csv") %>%
clean_names() %>%
mutate(quarter = zoo::as.yearqtr(quarter))
## Rows: 40632 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): Quarter, QuarterQF, HB, HBQF, Location, LocationQF, AdmissionType,...
## dbl (7): SIMD, Episodes, LengthOfEpisode, AverageLengthOfEpisode, Stays, Le...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# generate stays data for all health boards to compare individual hbs to
all_hbs_stays <- inpatients_deprivation %>%
group_by(quarter) %>%
summarise(stays = sum(stays),
length_of_stay = sum(length_of_stay),
average_length_of_stay = length_of_stay / stays) %>%
mutate(hb = "All health boards", .after = quarter)
# select same columns from original df
stays_per_hb <- inpatients_deprivation %>%
# check group by
group_by(hb, quarter) %>%
summarise(stays = sum(stays),
length_of_stay = sum(length_of_stay),
average_length_of_stay = length_of_stay / stays) %>%
select(quarter, hb, stays, length_of_stay, average_length_of_stay)
## `summarise()` has grouped output by 'hb'. You can override using the `.groups`
## argument.
# combine all_hbs data with individual hbs data
stays <- bind_rows(all_hbs_stays, stays_per_hb)
# plot ave_length_of_stay over time with hb filter
# assign to plot_ts_length_stay_filter_hb
# note scale all health boards == individual HB so keep on same graph
stays %>%
# filter for multi-select - replace S08000015 with input selector for hb
filter(hb %in% c("All health boards","S08000015")) %>%
ggplot() +
aes(x = quarter, y = average_length_of_stay, colour = hb) +
geom_line() +
geom_point() +
scale_colour_brewer(type = "qual", palette = "Set1") +
labs(x = "\nYear quarter", y = "Average length of stay\n",
title = "Length of stay for inpatients and day cases",
colour = "Health board") +
theme(legend.position = "bottom",
panel.background = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
axis.text = element_text(size = 20),
axis.title = element_text(size = 28),
legend.title = element_text(size = 28),
plot.title = element_text(size = 32)
)
Starting point: analyses/thijmen_exploring_beds.Rmd
Naomi - PDA outcomes: 2.5. Exploratory data analysis, 2.6. Statistical analyses to identify patterns, trends and relationships in the dataset, and 2.7. Interpreting the results of statistical analysis
# naomi
# load in data
beds <- read_csv("../data/beds_by_nhs_board_of_treatment_and_specialty.csv") %>%
clean_names() %>%
mutate(quarter = zoo::as.yearqtr(quarter))
## Rows: 30284 Columns: 20
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): Quarter, QuarterQF, HB, HBQF, Location, LocationQF, Specialty, Spe...
## dbl (5): AllStaffedBeddays, TotalOccupiedBeddays, AverageAvailableStaffedBe...
## lgl (4): AllStaffedBeddaysQF, TotalOccupiedBeddaysQF, AverageAvailableStaff...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# generate occupancy data for all health boards to compare individual hbs to
all_hbs_occupancy <- beds %>%
group_by(quarter) %>%
# calculate quarterly proportion (% occupancy) for all hbs
summarise(total_occupied_beddays = sum(total_occupied_beddays),
all_staffed_beddays = sum(all_staffed_beddays)) %>%
mutate(percentage_occupancy = 100 * (total_occupied_beddays / all_staffed_beddays),
hb = "All health boards", .after = quarter)
# select same columns from original df
occupancy_per_hb <- beds %>%
group_by(hb, quarter) %>%
# calculate quarterly proportion (% occupancy) per hbs
summarise(total_occupied_beddays = sum(total_occupied_beddays),
all_staffed_beddays = sum(all_staffed_beddays)) %>%
mutate(percentage_occupancy = 100 * (total_occupied_beddays / all_staffed_beddays))
## `summarise()` has grouped output by 'hb'. You can override using the `.groups`
## argument.
# combine all_hbs data with individual hbs data
occupancy <- bind_rows(all_hbs_occupancy, occupancy_per_hb)
# plot ave_length_of_stay over time with hb filter
# assign to plot_ts_occupancy_filter_hb
# note scale all health boards == individual HB so keep on same graph
occupancy %>%
# filter for multi-select - replace S08000015 with input selector for hb
filter(hb %in% c("All health boards","S08000015", "S08000025")) %>%
ggplot() +
aes(x = quarter, y = percentage_occupancy, colour = hb) +
geom_line() +
geom_point() +
scale_colour_brewer(type = "qual", palette = "Set1") +
labs(x = "\nYear quarter", y = "Percentage occupancy\n",
title = "Percentage occupancy (hospital beds)",
colour = "Health board") #+
# theme(legend.position = "bottom",
# panel.background = element_blank(),
# panel.grid.minor.x = element_blank(),
# panel.grid.minor.y = element_blank(),
# axis.text = element_text(size = 20),
# axis.title = element_text(size = 28),
# legend.title = element_text(size = 28),
# plot.title = element_text(size = 32)
# )
Interpretation: bed occupancy differs by health board, e.g. here we can see it is higher than the national average (red, “All health boards”) for health board 15 (blue) and usually lower for health board 25 (green). All health boards see a dip in occupancy in Q2 2020, due to COVID. The return to pre-covid occupancy levels also seems to differ by health board - this is an interesting one to show on the dashboard.
library(leaflet)
library(shiny)
# load in hospitals data
## hospitals for map
hospitals <- read_csv("../data/cleaned_data/hospital_locations_clean.csv")
## Rows: 277 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (14): Location, LocationName, Postcode, AddressLine, HB, HSCP, HSCPQF, C...
## dbl (4): XCoordinate, YCoordinate, longitude, latitude
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
### colour palette for hospitals on map
pal <- colorFactor(c("navy", "blue", "steelblue", "skyblue",
"red","indianred", "maroon", "brown",
"springgreen", "springgreen2", "springgreen3", "springgreen4",
"gold", "goldenrod", "yellow", "orange"),
domain = unique(hospitals$HB))
# raw data to use
beds <- read_csv("../data/beds_by_nhs_board_of_treatment_and_specialty.csv") %>%
clean_names() %>%
mutate(quarter = zoo::as.yearqtr(quarter))
## Rows: 30284 Columns: 20
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): Quarter, QuarterQF, HB, HBQF, Location, LocationQF, Specialty, Spe...
## dbl (5): AllStaffedBeddays, TotalOccupiedBeddays, AverageAvailableStaffedBe...
## lgl (4): AllStaffedBeddaysQF, TotalOccupiedBeddaysQF, AverageAvailableStaff...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# data prep script for occupancy heatmap
most_recent_quarter <- beds %>%
arrange(desc(quarter)) %>%
head(1) %>%
pull(quarter)
hospitals_lookup <- hospitals %>%
clean_names() %>%
select(location, location_name, longitude, latitude)
hospital_occupancy <- beds %>%
filter(quarter == most_recent_quarter) %>%
group_by(location, hb) %>%
summarise(total_occupied_beddays = sum(total_occupied_beddays),
all_staffed_beddays = sum(all_staffed_beddays)) %>%
mutate(percentage_occupancy = 100 * (total_occupied_beddays / all_staffed_beddays))
## `summarise()` has grouped output by 'location'. You can override using the
## `.groups` argument.
hospital_location_occupancy <- hospital_occupancy %>%
left_join(hospitals_lookup)
## Joining with `by = join_by(location)`
# show hospitals on map, currently coloured by hb
hospital_location_occupancy %>%
filter(hb == "S08000031") %>%
leaflet() %>%
addTiles() %>%
addCircleMarkers(lng = ~ longitude,
lat = ~ latitude,
weight = 1,
popup = ~ paste(location_name, br(), "Board:", hb),
color = ~ percentage_occupancy
)
## Warning in validateCoords(lng, lat, funcName): Data contains 1 rows with either
## missing or invalid lat/lon values and will be ignored
Starting point: one of Naomi’s notebooks from Tuesday AM
Starting point: one of Chiara/Thijmen’s notebooks from Monday